package com.jason.photography.api.service.impl;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.date.StopWatch;
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.RegexPool;
import cn.hutool.core.text.CharSequenceUtil;
import cn.hutool.core.util.ReUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.core.util.URLUtil;
import cn.hutool.http.HttpException;
import cn.hutool.http.HttpUtil;
import com.jason.common.core.exception.BizException;
import com.jason.common.http.service.OkHttpService;
import com.jason.common.service.exception.Assert;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.Serializable;
import java.net.URL;
import java.util.*;
import java.util.concurrent.CompletableFuture;

/**
 * TODO
 *
 * @author gzc
 * @since 2024/3/4 3:16
 **/
@Slf4j
@Data
public class Spider implements Serializable {
    private transient OkHttpService okHttpService;
    private final String url;
    private final String baseUrl;
    private final String domainName;
    private String imgServerBaseUrl;

    private String title;
    private transient final List<String> skipLinkList = new ArrayList<>(16);
    private transient final List<String> imgLinkList = new ArrayList<>(16);
    private transient final Map<Integer, Data> map = new HashMap<>(256);
    // 最终图片数据
    private List<String> imgList = new ArrayList<>(256);
    private String author;

    private transient final StopWatch stopWatch;

    public Spider(String url) {
        this.url = url;
        stopWatch = new StopWatch(this.url);
        if (!ReUtil.isMatch(RegexPool.URI, this.url)) {
            throw new BizException("网址 " + this.url + " 格式错误");
        }
        URL urlObj = URLUtil.url(this.url);
        this.domainName = urlObj.getHost();
        this.baseUrl = urlObj.getProtocol() + "://" + this.domainName;
    }

    public void execute() {
        // 获取网页html字符串
        Document document = getDocument(this.url);
        // 获取作者
        stopWatch.start("获取作者");
        if (StrUtil.isBlank(this.author)) {
            this.author = Assert.notBlank(getAuthor(document), "获取作者为空");
        }
        stopWatch.stop();

        // 获取标题
        stopWatch.start("获取标题");
        this.title = Assert.notBlank(getTitle(document), "获取标题为空");
        stopWatch.stop();


        // 获取所有a标签
        stopWatch.start("获取所有a标签");
        List<String> aLinkList = getSkipLinkList(document);
        this.skipLinkList.addAll(aLinkList);
        stopWatch.stop();

        // 获取所有图片链接
        stopWatch.start("获取所有图片链接");
        List<String> imgLinkList = getImgLinkList(document);
        stopWatch.stop();
        this.imgLinkList.addAll(imgLinkList);
        Data data = new Data();
        data.setUrl(this.url);
        data.setImgUrlList(imgLinkList);
        this.map.put(0, data);

        // 获取其他页面数据
        stopWatch.start("获取其他页面数据");
        if (CollUtil.isNotEmpty(aLinkList)) {
            List<CompletableFuture> futureList = new ArrayList<>(aLinkList.size());
            for (int i = 0; i < aLinkList.size(); i++) {
                int finalI = i;
                CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
                    // 拼接完整网址
                    String fullSkipUrl = this.baseUrl + aLinkList.get(finalI);
                    Data data1 = new Data();
                    data1.setUrl(fullSkipUrl);
                    data1.setImgUrlList(getImgLinkList(getDocument(fullSkipUrl)));
                    this.map.put(finalI + 1, data1);
                });
                futureList.add(future);
            }
            // 并行执行
            futureList.stream().map(CompletableFuture::join).toList();
        }
        stopWatch.stop();

        // 顺序处理图片数据
        List<String> collectList = new TreeMap<>(this.map).values().stream().map(Data::getImgUrlList).flatMap(Collection::stream).toList();
        this.imgList.addAll(collectList);
    }

    private String getAuthor(Document document) {
        Elements elements = document.select("a[rel]");
        if (CollUtil.isNotEmpty(elements)) {
            Element element = elements.get(0);
            return element.text();
        }
        return "";
    }

    private Document getDocument(String url) {
        String htmlStr = okHttpService == null ? HttpUtil.get(url) : okHttpService.doGet("获取网页html结构", url);
        Document document;
        try {
            document = Jsoup.parse(htmlStr);
        } catch (Exception e) {
            throw new BizException("Jsoup解析html发生异常", e);
        }
        return document;
    }

    private List<String> getImgLinkList(Document document) {
        List<String> list = new ArrayList<>(16);
        Elements elements = document.select("img[src]");
        if (CollUtil.isEmpty(elements)) {
            return list;
        }
        for (Element element : elements) {
            String src = filterImgLinkStr(element.attr("src"));
            if (StrUtil.isNotBlank(src)) {
                list.add(src);
            }
        }
        // 去重
        list = list.stream().distinct().filter(StrUtil::isNotBlank).toList();
        return list;
    }

    private String filterImgLinkStr(String src) {
        String result = "";
        if (!StrUtil.containsIgnoreCase(src, "uploadfile")) {
            return result;
        }
        if (StrUtil.containsIgnoreCase(src, "/pic/")) {
            return result;
        }
        return src;
    }

    private List<String> getSkipLinkList(Document document) {
        Assert.notNull(document, "document为空");
        List<String> list = new ArrayList<>(16);
        Elements elements = document.select("a[href]");
        if (CollUtil.isEmpty(elements)) {
            return list;
        }
        for (Element element : elements) {
            String href = filterSkipLinkStr(element.attr("href"));
            if (StrUtil.isNotBlank(href)) {
                list.add(href);
            }
        }
        // 去重
        list = list.stream().distinct().filter(StrUtil::isNotBlank).toList();
        return list;
    }

    private String filterSkipLinkStr(String href) {
        String result = "";
        // 如果跳转链接中有其中一个则过滤掉
        if (CharSequenceUtil.containsAny("https://", "http://", "javascript")) {
            return result;
        }
        // 如果跳转链接中没有则过滤掉
        if (!href.contains(".html")) {
            return result;
        }
        // 获取网址路径
        String path = URLUtil.url(this.url).getPath();
        String pathReplace = path.replace(".html", "");
        if (!href.contains(pathReplace)) {
            return result;
        }
        String srcReplace = href.replace(".html", "");
        if (!srcReplace.contains(pathReplace + "_")) {
            return result;
        }
        String replace = srcReplace.replace(pathReplace + "_", "");
        if (!ReUtil.isMatch(RegexPool.NUMBERS, replace)) {
            return result;
        }
        return href;
    }

    private String getTitle(Document document) {
        Assert.notNull(document, "document为空");
        String title = "";
        // 获取图片Url集合
        Elements meta = document.select("meta");
        if (CollUtil.isNotEmpty(meta)) {
            for (Element element : meta) {
                if ("description".equals(element.attr("name"))) {
                    title = element.attr("content");
                    break;
                }
            }
        }
        if (StrUtil.isBlank(title)) {
            Elements elements = document.select("title");
            if (CollUtil.isNotEmpty(elements)) {
                for (Element element : elements) {
                    if (StrUtil.isNotBlank(element.text())) {
                        title = element.text();
                        break;
                    }
                }
            }
        }
        if (StrUtil.isNotBlank(title)) {
            title = title.substring(title.indexOf("]") + 1);
        }
        return title;
    }

    public int writeImgFile(String outputImgFileDir) {
//        stopWatch.start("生成图片文件");
        Assert.notBlank(outputImgFileDir, "图片保存的本地文件夹为空");
        if (!ReUtil.isMatch(RegexPool.URL, outputImgFileDir)) {
            throw new BizException("图片保存路径->" + outputImgFileDir + " 格式不正确");
        }
        if (CollUtil.isNotEmpty(this.imgList)) {
            // 拼接父目录
            String dir = outputImgFileDir + File.separator + (StrUtil.isBlank(this.author) ? this.url : this.author) + File.separator + (StrUtil.isBlank(this.title) ? this.url : this.title);
            // 创建文件夹
            File mkdir = FileUtil.mkdir(dir);
            if (!mkdir.exists()) {
                throw new BizException("创建文件夹失败");
            }
            List<CompletableFuture> futureList = new ArrayList<>(64);
            try {
                // 生成图片
                for (int i = 0; i < this.imgList.size(); i++) {
                    int finalI = i;
                    CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
                        String fullImgUrl = "";
                        try {
                            String imgUrl = this.imgList.get(finalI);
                            if (imgUrl.contains("https://") || imgUrl.contains("http://")) {
                                fullImgUrl = imgUrl;
                            } else {
                                fullImgUrl = (StrUtil.isNotBlank(this.imgServerBaseUrl) ? this.imgServerBaseUrl : this.baseUrl) + imgUrl;
                            }
                            byte[] imgFileBytes;
                            try {
                                imgFileBytes = okHttpService == null ? HttpUtil.downloadBytes(fullImgUrl) : okHttpService.download(fullImgUrl);
                            } catch (HttpException e) {
                                if (e.getMessage().contains("404")) {
                                    log.error("本地目录->" + dir + ",第" + (finalI + 1) + "张图片, 图片下载地址->" + fullImgUrl + "，发生错误，404未找到资源");
                                    return;
                                }
                                throw e;
                            }
                            Assert.notNullElements(imgFileBytes, "图片地址->{}, 下载图片数据为空", fullImgUrl);
                            String imgFileName = String.valueOf(finalI + 1);
                            // 获取图片格式
                            String mimeType = Assert.notBlank(FileUtil.getMimeType(fullImgUrl), "获取图片格式失败");
                            String imgType = mimeType.split("/")[1];
                            String fullImgFilePath = dir + File.separator + imgFileName + "." + imgType;
                            File file = FileUtil.writeBytes(imgFileBytes, fullImgFilePath);
                            Assert.isTrue(file.exists(), "图片网址->{}，图片保存本地地址->{}，保存图片到本地磁盘失败", fullImgUrl, fullImgFilePath);
                        } catch (Exception e) {
                            System.out.println("图片网址->" + fullImgUrl + "发生异常->" + e.getMessage());
                            throw new BizException("下载文件，保存文件发生异常", e);
                        }
                    });
                    futureList.add(future);
                }
                // 并行执行
                futureList.stream().map(CompletableFuture::join).toList();
            } catch (Throwable e) {
                if (!FileUtil.del(mkdir)) {
                    throw new BizException("删除保存图片失败的文件夹失败");
                }
                throw new BizException("下载图片并保存本地磁盘发生异常", e);
            }
        }
//        stopWatch.stop();
//        System.out.println(stopWatch.prettyPrint(TimeUnit.MILLISECONDS));
        return this.imgList.size();
    }


    @lombok.Data
    public static class Data {
        private String url;
        private List<String> imgUrlList;
    }

    public void setImgServerBaseUrl(String imgServerBaseUrl) {
        this.imgServerBaseUrl = imgServerBaseUrl;
    }

    public void enableOkHttp(OkHttpService okHttpService) {
        this.okHttpService = okHttpService;
    }

    public String getUrl() {
        return url;
    }

    public String getImgServerBaseUrl() {
        return imgServerBaseUrl;
    }

    public String getTitle() {
        return title;
    }

    public List<String> getSkipLinkList() {
        return skipLinkList;
    }

    public List<String> getImgLinkList() {
        return imgLinkList;
    }

    public String getBaseUrl() {
        return baseUrl;
    }

    public String getDomainName() {
        return domainName;
    }

    public Map<Integer, Data> getMap() {
        return map;
    }

    public String getAuthor() {
        return author;
    }

    public void setAuthor(String author) {
        this.author = author;
    }

    public List<String> getImgList() {
        return imgList;
    }

    public void setImgList(List<String> imgList) {
        this.imgList = imgList;
    }
}
